Lab 4

Author

Allison Louie

library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(leaflet)
library(R.utils)
Loading required package: R.oo
Loading required package: R.methodsS3
R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.
R.oo v1.25.0 (2022-06-12 02:20:02 UTC) successfully loaded. See ?R.oo for help.

Attaching package: 'R.oo'
The following object is masked from 'package:R.methodsS3':

    throw
The following objects are masked from 'package:methods':

    getClasses, getMethods
The following objects are masked from 'package:base':

    attach, detach, load, save
R.utils v2.12.2 (2022-11-11 22:00:03 UTC) successfully loaded. See ?R.utils for help.

Attaching package: 'R.utils'
The following object is masked from 'package:utils':

    timestamp
The following objects are masked from 'package:base':

    cat, commandArgs, getOption, isOpen, nullfile, parse, warnings

#1 Read in the data

download.file(
  "https://raw.githubusercontent.com/USCbiostats/data-science-data/master/02_met/met_all.gz",
  destfile = ("met_all.gz"),
  method   = "libcurl",
  timeout  = 60
)

met <- data.table::fread("met_all.gz")

#2 Prepare the data

met <- met[met$temp >= -17][elev == 9999.0, elev := NA]
met[, week := as.integer(format(as.Date(paste(year, month, day, sep = "-")), "%U"))]
met <- met[week == 31 | week == 32]

met_avg <- met[,.(
temp = mean(temp,na.rm=TRUE),
rh = mean(rh,na.rm=TRUE),
wind.sp = mean(wind.sp,na.rm=TRUE),
vis.dist = mean(vis.dist,na.rm=TRUE),
dew.point = mean(dew.point,na.rm=TRUE),
lat = mean(lat),
lon = mean(lon),
elev = mean(elev,na.rm=TRUE)
), by=c("USAFID", "day")
]

met_avg = mutate(met_avg, region = ifelse(lon >= -98 & lat >= 39.71, "NE",
                            ifelse(lon >= -98 & lat < 39.71, "NW",
                            ifelse(lon < -98 & lat >= 39.71, "SE", "SW"))))

met_avg = mutate(met_avg, elev_cat = ifelse(elev > 252, "high", "low"))

#3 Use geom_violin to examine the wind speed and dew point by region

met_avg <- na.omit(met_avg)
ggplot(met_avg, aes(x = 1, y = wind.sp)) +
  geom_violin(aes(fill = region), scale = "width", width = 0.8, na.rm = TRUE) +
  geom_violin(aes(x = 2, y = dew.point, fill = region), scale = "width", width = 0.8, na.rm = TRUE) +
  labs(x = NULL, y = "Value") +
  scale_x_continuous(breaks = c(1, 2), labels = c("Wind Speed", "Dew Point")) +
  facet_wrap(~region, nrow = 1) +  
  ggtitle("Wind Speed and Dew Point by Region")

It appears that wind speed and dew point have an inverse relationship. Dew points are high when wind speed is lower. It appears that in the southwest, there is a lot more dew overall compared to the other regions, having a large range.

#4 Use geom_jitter with stat_smooth to examine the association between dew point and wind speed by region

ggplot(met_avg, aes(x = dew.point, y = wind.sp, color = region, na.rm = TRUE)) +
  geom_jitter() +
  stat_smooth() +
  labs(x = "Dew Point", y = "Wind Speed") + 
  ggtitle("Association Between Dew Point and Wind Speed by Region")
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

#5 Use geom_bar to create barplots of the weather stations by elevation category colored by region

ggplot(met_avg, aes(x = elev_cat, fill = region)) +
  geom_bar(position = "dodge", na.rm = TRUE) +
  scale_fill_brewer(palette = "PiYG") +
  labs(x = "Elevation Levels", y = "Count", fill = "Region") +
  ggtitle("Weather Stations by Elevation Levels and Region")

In the graph, we can see that in the northwest, it has a lot of weather stations at the lower elevation levels. At higher elevations, the northeast has a lot of weather stations but not as much as northwest at lower elevation. In both elevation levels, southeast has the least amount of weather stations.

#6 Use stat_summary to examine mean dew point and wind speed by region with standard deviation error bars

ggplot(met_avg, aes(x = region, y = dew.point)) +
  geom_point() + 
  geom_point(aes(y = wind.sp), color = "blue") + 
  stat_summary(
    fun.data = "mean_sdl",
    geom = "errorbar",
  ) +
  stat_summary(
    fun.data = "mean_sdl", 
    geom = "errorbar",
    aes(y = wind.sp),
    color = "blue"
  ) +
  labs(
    x = "Region",
    y = "Mean Value",
    title = "Mean Dew Point and Wind Speed by Region with Error Bars"
  ) +
  theme_minimal()

Dew point is more spread across the values, with a large variance. Wind speed is more limited with a shorter range and has smaller variance in comparison. The only ones where wind speeds are within the range of dew points are in the southeast and southwest.

#7 Make a map showing the spatial trend in relative humidity in the US

library(leaflet)
met_avg2 <- met[,.(rh, lat = lat, lon = lon), by=c("USAFID")]
met_avg2 <- met_avg2[!is.na(rh)]

rh.pal <- colorNumeric(c('blue','green', 'yellow', 'red'), domain = met_avg2$rh)
rh.pal
function (x) 
{
    if (length(x) == 0 || all(is.na(x))) {
        return(pf(x))
    }
    if (is.null(rng)) 
        rng <- range(x, na.rm = TRUE)
    rescaled <- scales::rescale(x, from = rng)
    if (any(rescaled < 0 | rescaled > 1, na.rm = TRUE)) 
        warning("Some values were outside the color scale and will be treated as NA")
    if (reverse) {
        rescaled <- 1 - rescaled
    }
    pf(rescaled)
}
<bytecode: 0x7fbc89d96df0>
<environment: 0x7fbc89d948b0>
attr(,"colorType")
[1] "numeric"
attr(,"colorArgs")
attr(,"colorArgs")$na.color
[1] "#808080"
top_10 <- met_avg2 %>% filter(rank(-met_avg2$rh) <= 10)

rh.map <- leaflet(met_avg2) %>%
 addProviderTiles('CartoDB.Positron') %>%
  addCircles(lat = ~lat, lng=~lon,
             label = ~paste0(round(rh,2), "%"), color = ~ rh.pal(rh),
             opacity = 1, fillOpacity = 1, radius = 5
             ) %>%
  addMarkers(
    data = top_10, 
    lng = ~lon, 
    lat = ~lat,
    label = ~paste("Top 10: ", round(rh,2), "%"),
    options = markerOptions(icon = makeIcon(iconUrl = NULL))
  ) %>%
   addLegend(
    "bottomright",
    pal = rh.pal,
    values = ~rh,
    title = "Relative Humidity (%)",
    opacity = 0.7
  )
  
rh.map

Across the US, the relative humidity appears to be warmer on the east coast(north and south) moreso than on the west coast(north and south). Unfortunately, I was unable to get the markers for what is in the top 10, but I would assume it would be more towards the east since there is a lot of humidity there.

#8 Use a ggplot extension

library(ggforce)
ggplot(met_avg, aes(elev, elev_cat, colour = region)) +
  geom_point() +
  facet_zoom(x = region == "versicolor")